#用来泛化一些个的信息
import json
import os
import re

# file=os.walk('/home/lxy/wxbdata/东北大学计算机学院师资介绍-黄鹏程')
# answer=[]
# for dir_path,dir_name,file_list in file:
#     for file_name in file_list: 
#         # print(dir_path,dir_name,file_name)
#         with open(os.path.join(dir_path,file_name)) as f:
#             doc=f.read()
#             doc=re.sub(r'\\n.{1,3}\\n','\\n\\n',doc)
#             answer.append((file_name[:-4],doc))



question_template=set()
# templates = set(["请告诉我有关东北大学的{teacher}教师的信息","我想了解一下东北大学的{teacher}，可以给我一些详细资料吗？","请提供一份有关{teacher}的简介","帮我介绍一下{teacher}","帮我介绍一下{teacher}",
# "介绍一下{teacher}",
# "我想了解一下{teacher}",
# "我想了解{teacher}",
# "{teacher}",
# "介绍{teacher}",
# "{teacher}个人信息",
# "{teacher}个人信息",
# "提供一些关于{teacher}的背景信息",
# "说说{teacher}",
# "描述一下{teacher}？",
# "{teacher}怎么样？",
# "{teacher}怎么样？",
# "{teacher}是谁？",
# "{teacher}是谁？",
# '{teacher}是谁','介绍一下{teacher}','东北大学{teacher}','{teacher}',
# '{teacher}有什么学术成就'
# ])

templates = set(["请告诉我有关东北大学的{teacher}的信息","我想了解一下东北大学的{teacher}，可以给我一些详细资料吗？","请提供一份有关{teacher}的简介","帮我介绍一下{teacher}",
"介绍一下{teacher}",
"我想了解一下{teacher}",
"我想了解{teacher}",
"{teacher}",
"介绍{teacher}",
"{teacher}个人信息",
"{teacher}个人信息",
"提供一些关于{teacher}的背景信息",
"说说{teacher}",
"描述一下{teacher}？",
"{teacher}怎么样？",
"{teacher}怎么样？",
"{teacher}是谁？",
"{teacher}是谁？",
'{teacher}是谁','介绍一下{teacher}','东北大学{teacher}','{teacher}',
'{teacher}有什么学术成就'
])

with open('/home/lxy/wxbdata/db/神秘的.jsonl','a',encoding='utf-8') as o:
    for s in templates:
        teacher='黄鹏程'

        answer='东北大学22级硕士，在东北大学自然语言处理实验室开展约束机器翻译、大语言模型提示工程等工作。和穆永誉、肖春阳等合作，已投出coling2023一篇。'
        # for teacher,answer_doc in answer:
        question=s.format_map({'teacher':teacher})
        temp_dict={'question':question,'answer':answer}
        json.dump(temp_dict,fp=o,ensure_ascii=False,allow_nan=False)
        o.write('\n')

